Graphical inspection
plotDispEsts(dds, main="Dispersion plot")

rld <- rlogTransformation(dds)
head(assay(rld))
## SRR24750870 SRR24750872 SRR24750875 SRR24750884 SRR24750886
## ENSG00000000003.16 6.329671 6.051057 6.267613 6.779514 6.176066
## ENSG00000000005.6 4.559225 2.904288 3.349846 2.762746 3.672551
## ENSG00000000419.14 8.591156 8.770587 9.307182 8.997897 9.203417
## ENSG00000000457.14 7.589054 7.869972 8.058020 8.274363 7.901477
## ENSG00000000460.17 6.126614 5.272208 5.909147 6.000162 5.593122
## ENSG00000000938.13 5.477545 6.233680 6.394446 5.567846 8.023214
## SRR24750888 SRR24750890 SRR24750891 SRR24750893 SRR24750895
## ENSG00000000003.16 6.244494 6.493658 6.119276 6.762724 6.592733
## ENSG00000000005.6 3.090547 3.234220 2.902277 3.313863 2.884141
## ENSG00000000419.14 9.171032 8.740413 9.324809 8.656339 8.704264
## ENSG00000000457.14 8.053547 7.691997 7.808946 7.911316 7.842755
## ENSG00000000460.17 6.169462 5.536892 6.231253 5.722466 6.324115
## ENSG00000000938.13 5.491031 5.875441 5.238462 5.942633 5.990384
## SRR24750897 SRR24750899 SRR24750902 SRR25143906 SRR25143907
## ENSG00000000003.16 6.489076 6.625695 6.098108 6.397513 6.927003
## ENSG00000000005.6 3.969479 3.255026 5.183687 6.139530 6.525705
## ENSG00000000419.14 8.608024 8.932316 8.694287 8.795892 8.952481
## ENSG00000000457.14 7.737008 7.944524 8.048814 7.972602 7.690648
## ENSG00000000460.17 6.052215 5.322183 5.690707 6.500251 6.008938
## ENSG00000000938.13 6.434979 6.320757 5.837344 6.774240 6.796708
## SRR25143908 SRR25143909 SRR25143910 SRR25143911 SRR25143912
## ENSG00000000003.16 6.451050 6.820612 6.406500 6.557796 6.592475
## ENSG00000000005.6 6.840662 3.447859 3.260614 3.625384 4.821752
## ENSG00000000419.14 8.936241 8.954982 9.018376 9.017794 8.982253
## ENSG00000000457.14 7.744739 7.939098 7.941927 7.745713 7.639098
## ENSG00000000460.17 6.421365 6.514294 6.369991 6.308550 6.132154
## ENSG00000000938.13 6.304516 6.216860 6.942990 6.475628 5.669449
## SRR25143913
## ENSG00000000003.16 6.602341
## ENSG00000000005.6 3.317934
## ENSG00000000419.14 8.879044
## ENSG00000000457.14 8.056516
## ENSG00000000460.17 6.420569
## ENSG00000000938.13 6.416174
mycols <- brewer.pal(8, "Dark2")[1:length(unique(samples_TA$health_state))]
sampleDists <- as.matrix(dist(t(assay(rld))))
heatmap.2(as.matrix(sampleDists), key=F, trace="none",
col=colorpanel(100, "black", "white"),
ColSideColors=mycols[samples_TA$health_state],
RowSideColors=mycols[samples_TA$health_state],
margin=c(10, 10), main="Sample Distance Matrix")
legend(x=0,y=1.1, legend = levels(samples_TA$health_state), fill = mycols, title = "Categories",bty='n')

selected <- order(rowMeans(counts(dds,normalized=TRUE)),
decreasing=TRUE)[1:20]
df <- as.data.frame(colData(dds)[,c("health_state", "sex")])
pheatmap(assay(rld)[selected,], cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=FALSE, annotation_col=df)

DESeq2::plotPCA(rld, intgroup=c("health_state"))
## using ntop=500 top features by variance

pcaData <- plotPCA(rld, intgroup=c("health_state",'sex'), returnData=TRUE)
## using ntop=500 top features by variance
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=health_state, shape=sex)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()

# hist(res$pvalue, breaks=50, col="grey")
# DESeq2::plotMA(dds, ylim=c(-1,1))
#
# # Volcano plot
# with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-2.5,2)))
# with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
fig <- plot_ly(x = res$pvalue, type = "histogram")
fig <- fig %>% layout(title = "Histogram of p-values",
xaxis = list(title = "Value"),
yaxis = list(title = "Frequency"))
fig
## Warning: Ignoring 91 observations
easyMAplot(res, output_shiny=FALSE)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl")
res <- res[order(-abs(res$log2FoldChange)), ]
resFiltered <- resDF %>% dplyr::filter(abs(log2FoldChange)>1.5,padj<0.05)
resDF$ensembl_gene_id <- gsub("\\.\\d+$", "", resDF$ensembl_gene_id)
gene_info <- getBM(c("ensembl_gene_id","hgnc_symbol","entrezgene_id","description","entrezgene_description"), "ensembl_gene_id", resDF$ensembl_gene_id, ensembl) |> group_by(ensembl_gene_id) |> dplyr::slice_head(n = 1) |> ungroup()
resDF <- resDF %>%
left_join(gene_info, by = "ensembl_gene_id")
res1111 <- resDF %>% dplyr::filter(padj<0.05)
top_peaks <- as.data.frame(head(resDF,10))
a <- list()
for (i in seq_len(nrow(top_peaks))) {
m <- top_peaks[i, ]
annot = ifelse(m[["hgnc_symbol"]]=="", "novel", m[["hgnc_symbol"]])
ax <- sample(c(-1, 1), 1) * runif(1, 5, 60)
ay <- sample(c(-1, 1), 1) * runif(1, 5, 40)
a[[i]] <- list(
x = m[["log2FoldChange"]],
y = -log10(m[["pvalue"]]),
text = annot,
xref = "x",
yref = "y",
showarrow = TRUE,
arrowhead = 0.5,
ax = ax,
ay = ay
)
}
easyVolcano(res, fccut = 2, output_shiny=FALSE) %>% layout(annotations = a)
## Warning: Ignoring 91 observations
data(kegg.sets.hs)
data(sigmet.idx.hs)
kegg.sets.hs <- kegg.sets.hs[sigmet.idx.hs]
foldchanges <- resDF$log2FoldChange
names(foldchanges) <- resDF$entrezgene_id
keggres <- gage(foldchanges, gsets=kegg.sets.hs, same.dir=FALSE)
lapply(keggres, head)
## $greater
## p.geomean stat.mean
## hsa04512 ECM-receptor interaction 0.0003702404 3.441015
## hsa04610 Complement and coagulation cascades 0.0005943481 3.328515
## hsa04974 Protein digestion and absorption 0.0017050649 2.979736
## hsa04514 Cell adhesion molecules (CAMs) 0.0021366136 2.884320
## hsa04115 p53 signaling pathway 0.0403289882 1.760560
## hsa04145 Phagosome 0.0629244296 1.535402
## p.val q.val set.size
## hsa04512 ECM-receptor interaction 0.0003702404 0.04754784 85
## hsa04610 Complement and coagulation cascades 0.0005943481 0.04754784 56
## hsa04974 Protein digestion and absorption 0.0017050649 0.08546454 70
## hsa04514 Cell adhesion molecules (CAMs) 0.0021366136 0.08546454 123
## hsa04115 p53 signaling pathway 0.0403289882 1.00000000 67
## hsa04145 Phagosome 0.0629244296 1.00000000 138
## exp1
## hsa04512 ECM-receptor interaction 0.0003702404
## hsa04610 Complement and coagulation cascades 0.0005943481
## hsa04974 Protein digestion and absorption 0.0017050649
## hsa04514 Cell adhesion molecules (CAMs) 0.0021366136
## hsa04115 p53 signaling pathway 0.0403289882
## hsa04145 Phagosome 0.0629244296
##
## $stats
## stat.mean exp1
## hsa04512 ECM-receptor interaction 3.441015 3.441015
## hsa04610 Complement and coagulation cascades 3.328515 3.328515
## hsa04974 Protein digestion and absorption 2.979736 2.979736
## hsa04514 Cell adhesion molecules (CAMs) 2.884320 2.884320
## hsa04115 p53 signaling pathway 1.760560 1.760560
## hsa04145 Phagosome 1.535402 1.535402
keggrespathwaysUp <- data.frame(id=rownames(keggres$greater), keggres$greater) %>%
tibble::as_tibble() %>%
dplyr::filter(row_number()<=6) %>%
.$id %>%
as.character()
keggrespathwaysUp
## [1] "hsa04512 ECM-receptor interaction"
## [2] "hsa04610 Complement and coagulation cascades"
## [3] "hsa04974 Protein digestion and absorption"
## [4] "hsa04514 Cell adhesion molecules (CAMs)"
## [5] "hsa04115 p53 signaling pathway"
## [6] "hsa04145 Phagosome"
keggresids <- substr(c(keggrespathwaysUp), start=1, stop=8)
keggresids
## [1] "hsa04512" "hsa04610" "hsa04974" "hsa04514" "hsa04115" "hsa04145"
plot_pathway <- function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="hsa", new.signature=FALSE)
#detach("package:dplyr", unload=T)
tmp <- sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="hsa"))
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory D:/VLTA
## Info: Writing image file hsa04512.pathview.png
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory D:/VLTA
## Info: Writing image file hsa04610.pathview.png
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory D:/VLTA
## Info: Writing image file hsa04974.pathview.png
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory D:/VLTA
## Info: Writing image file hsa04514.pathview.png
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory D:/VLTA
## Info: Writing image file hsa04115.pathview.png
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory D:/VLTA
## Info: Writing image file hsa04145.pathview.png
resDF <- resDF %>% mutate(diffexpressed = dplyr::case_when(
log2FoldChange > 0 & padj < 0.05 ~ 'UP',
log2FoldChange < 0 & padj < 0.05 ~ 'DOWN',
padj > 0.05 ~ 'NO'
))
original_gene_list <- resDF$log2FoldChange
names(original_gene_list) <- resDF$ensembl_gene_id
gene_list<-na.omit(original_gene_list)
gene_list = sort(gene_list, decreasing = TRUE)
gse <- gseGO(geneList=gene_list,
ont ="ALL",
keyType = "ENSEMBL",
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = org.Hs.eg.db,
pAdjustMethod = "none")
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (12.01% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
dotplot(gse, showCategory=10, font.size=7, split=".sign") + facet_grid(.~.sign)

#emapplot(gse, showCategory = 10)
websiteLive <- getOption("enrichR.live")
if (websiteLive) {
listEnrichrSites()
setEnrichrSite("Enrichr") # Human genes
}
## Enrichr ... Connection is Live!
## FlyEnrichr ... Connection is Live!
## WormEnrichr ... Connection is Live!
## YeastEnrichr ... Connection is Live!
## FishEnrichr ... Connection is Live!
## OxEnrichr ... Connection is Live!
## Connection changed to https://maayanlab.cloud/Enrichr/
## Connection is Live!
if (websiteLive) dbs <- listEnrichrDbs()
dbs <- c("GO_Molecular_Function_2023", "GO_Cellular_Component_2023", "GO_Biological_Process_2023","GO_Molecular_Function_2018", "GO_Cellular_Component_2018", "GO_Biological_Process_2018")
if (websiteLive) {
enriched <- enrichr(names(gene_list), dbs)
}
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2023... Done.
## Querying GO_Cellular_Component_2023... Done.
## Querying GO_Biological_Process_2023... Done.
## Querying GO_Molecular_Function_2018... Done.
## Querying GO_Cellular_Component_2018... Done.
## Querying GO_Biological_Process_2018... Done.
## Parsing results... Done.
df <- resDF |> dplyr::filter(padj<0.05)
up_regulated <- df |> dplyr::filter(log2FoldChange > 0) |> dplyr::mutate(ensembl_gene_id = gsub("\\.\\d+$", "", ensembl_gene_id)) |> dplyr::select(ensembl_gene_id)
write.table(up_regulated, 'up_DMD_healthy.txt', row.names = FALSE, col.names = FALSE, quote = FALSE)
down_regulated <- df |> dplyr::filter(log2FoldChange < 0) |> dplyr::mutate(ensembl_gene_id = gsub("\\.\\d+$", "", ensembl_gene_id)) |> dplyr::select(ensembl_gene_id)
write.table(down_regulated, 'down_DMD_healthy.txt', row.names = FALSE, col.names = FALSE, quote = FALSE)
# Choose databases
dbs <- c("GO_Biological_Process_2021", "KEGG_2021_Human", "WikiPathways_2019_Human", "Jensen_DISEASES")
genes <- names(gene_list)
# Run enrichment analysis
enriched_results <- enrichr(genes, dbs)
## Uploading data to Enrichr... Done.
## Querying GO_Biological_Process_2021... Done.
## Querying KEGG_2021_Human... Done.
## Querying WikiPathways_2019_Human... Done.
## Querying Jensen_DISEASES... Done.
## Parsing results... Done.
# Access results for a specific database
go_results <- enriched_results[["GO_Biological_Process_2021"]]
kegg_results <- enriched_results[["KEGG_2021_Human"]]
egoCC <- enrichGO(gene = as.character(gene_info$entrezgene_id),
#universe = as.character(universe$entrezgene_id),
OrgDb = org.Hs.eg.db,
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.1,
readable = TRUE)
egoCCDF <- egoCC@result
write.table(egoCCDF, 'CC_DMD_H.csv')
egoBP <- enrichGO(gene = as.character(gene_info$entrezgene_id),
OrgDb = org.Hs.eg.db,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.1,
readable = TRUE)
egoBPBP <- egoBP@result
write.table(egoBPBP, 'BP_DMD_H.csv')
sessionInfo()
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=Polish_Poland.utf8 LC_CTYPE=Polish_Poland.utf8
## [3] LC_MONETARY=Polish_Poland.utf8 LC_NUMERIC=C
## [5] LC_TIME=Polish_Poland.utf8
##
## time zone: Europe/Warsaw
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] txdbmaker_1.0.0 tximport_1.32.0
## [3] gageData_2.42.0 gage_2.54.0
## [5] pathview_1.44.0 org.Hs.eg.db_3.19.1
## [7] ggrepel_0.9.5 enrichplot_1.24.0
## [9] easylabel_0.2.8 pheatmap_1.0.12
## [11] plotly_4.10.4 ggplot2_3.5.1
## [13] gplots_3.1.3.1 RColorBrewer_1.1-3
## [15] DOSE_3.30.1 clusterProfiler_4.12.0
## [17] biomaRt_2.60.0 ensembldb_2.28.0
## [19] AnnotationFilter_1.28.0 DESeq2_1.44.0
## [21] SummarizedExperiment_1.34.0 MatrixGenerics_1.16.0
## [23] matrixStats_1.3.0 enrichR_3.2
## [25] readr_2.1.5 GenomicFeatures_1.56.0
## [27] AnnotationDbi_1.66.0 Biobase_2.64.0
## [29] GenomicRanges_1.56.1 GenomeInfoDb_1.40.1
## [31] IRanges_2.38.0 S4Vectors_0.42.0
## [33] BiocGenerics_0.50.0 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] later_1.3.2 splines_4.4.1 BiocIO_1.14.0
## [4] bitops_1.0-7 ggplotify_0.1.2 filelock_1.0.3
## [7] tibble_3.2.1 polyclip_1.10-6 graph_1.82.0
## [10] XML_3.99-0.16.1 lifecycle_1.0.4 httr2_1.0.1
## [13] lattice_0.22-6 MASS_7.3-60.2 crosstalk_1.2.1
## [16] magrittr_2.0.3 sass_0.4.9 rmarkdown_2.27
## [19] jquerylib_0.1.4 yaml_2.3.8 httpuv_1.6.15
## [22] cowplot_1.1.3 DBI_1.2.3 abind_1.4-5
## [25] zlibbioc_1.50.0 purrr_1.0.2 ggraph_2.2.1
## [28] RCurl_1.98-1.14 yulab.utils_0.1.4 WriteXLS_6.6.0
## [31] tweenr_2.0.3 rappdirs_0.3.3 GenomeInfoDbData_1.2.12
## [34] tidytree_0.4.6 codetools_0.2-20 DelayedArray_0.30.1
## [37] DT_0.33 xml2_1.3.6 ggforce_0.4.2
## [40] tidyselect_1.2.1 aplot_0.2.3 UCSC.utils_1.0.0
## [43] farver_2.1.2 viridis_0.6.5 BiocFileCache_2.12.0
## [46] GenomicAlignments_1.40.0 jsonlite_1.8.8 tidygraph_1.3.1
## [49] tools_4.4.1 progress_1.2.3 treeio_1.28.0
## [52] Rcpp_1.0.12 glue_1.7.0 gridExtra_2.3
## [55] SparseArray_1.4.8 xfun_0.44 qvalue_2.36.0
## [58] withr_3.0.0 fastmap_1.2.0 fansi_1.0.6
## [61] caTools_1.18.2 digest_0.6.35 mime_0.12
## [64] R6_2.5.1 gridGraphics_0.5-1 colorspace_2.1-0
## [67] GO.db_3.19.1 gtools_3.9.5 RSQLite_2.3.7
## [70] utf8_1.2.4 tidyr_1.3.1 generics_0.1.3
## [73] data.table_1.15.4 rtracklayer_1.64.0 htmlwidgets_1.6.4
## [76] prettyunits_1.2.0 graphlayouts_1.1.1 httr_1.4.7
## [79] S4Arrays_1.4.1 scatterpie_0.2.3 pkgconfig_2.0.3
## [82] gtable_0.3.5 blob_1.2.4 XVector_0.44.0
## [85] shadowtext_0.1.3 htmltools_0.5.8.1 fgsea_1.30.0
## [88] ProtGenerics_1.36.0 scales_1.3.0 png_0.1-8
## [91] ggfun_0.1.5 knitr_1.47 rstudioapi_0.16.0
## [94] tzdb_0.4.0 reshape2_1.4.4 rjson_0.2.21
## [97] nlme_3.1-164 curl_5.2.1 cachem_1.1.0
## [100] stringr_1.5.1 KernSmooth_2.23-24 shinycssloaders_1.0.0
## [103] parallel_4.4.1 HDO.db_0.99.1 restfulr_0.0.15
## [106] pillar_1.9.0 grid_4.4.1 vctrs_0.6.5
## [109] promises_1.3.0 dbplyr_2.5.0 xtable_1.8-4
## [112] Rgraphviz_2.48.0 KEGGgraph_1.64.0 evaluate_0.24.0
## [115] cli_3.6.2 locfit_1.5-9.9 compiler_4.4.1
## [118] Rsamtools_2.20.0 rlang_1.1.4 crayon_1.5.3
## [121] labeling_0.4.3 plyr_1.8.9 fs_1.6.4
## [124] stringi_1.8.4 viridisLite_0.4.2 BiocParallel_1.38.0
## [127] munsell_0.5.1 Biostrings_2.72.1 lazyeval_0.2.2
## [130] GOSemSim_2.30.0 Matrix_1.7-0 hms_1.1.3
## [133] patchwork_1.2.0 bit64_4.0.5 shiny_1.8.1.1
## [136] KEGGREST_1.44.1 highr_0.11 igraph_2.0.3
## [139] memoise_2.0.1 bslib_0.7.0 ggtree_3.12.0
## [142] fastmatch_1.1-4 bit_4.0.5 shinybusy_0.3.3
## [145] ape_5.8 gson_0.1.0